## This file replicates the meta analysis in Bhatti, Dahlgaard, Hansen, and Hansen (2016)
## "Is door-to-door canvassing effective in Europe? Evidence from a meta-study across six European countries"

## read in packages 
# install.packages("rmeta", "ggplot2", "dplyr") ## uncomment to install packages
library(rmeta)
library(ggplot2)
library(dplyr)
 
# clean environment
rm(list = ls())

## Load American studies 
am_data <- read.table("am_studies.csv", 
                      header = TRUE,
                      sep = ";")
am_data <- am_data[,1:4]

colnames(am_data) <- c("label", "CACE", "ses", "c_rate")
am_data <- subset(am_data, c_rate != "NA")
am_data$precision <- 1/am_data$ses^2

##Pooling DK studies

eff_DK <- meta.summaries(c(-2.1, -15.4), c(4.6, 13.9))$summary
se_DK  <- meta.summaries(c(-2.1, -15.4), c(4.6, 13.9))$se.summary

##meta analysis of European studies 

# assume knocked doors are treated through leaflet
effects <- c(6.7, 0.4, 0.5, 3.6, -8.5, 2.7, eff_DK)
ses     <- c(3.7, 0.8, 0.7, 1.9,  4.3, 1.7, se_DK)

# Slternative pooled estimate where leaflets in Pons & Liegey and Pons are assumed to have no effecteffects_1 <- c(6.7, 0.4/0.49,  0.5/0.48, 3.6, -8.5, 2.7, eff_DK)
ses_1     <- c(3.7, 0.8/0.49,  0.7/0.48, 1.9,  4.3, 1.7, se_DK)

meta   <- meta.summaries(effects  ,ses)
meta_1 <- meta.summaries(effects_1, ses_1)

# diplay estimates 
meta
meta_1
meta$se.summary
meta_1$se.summary

##Add GMA (2013) estimate without John & Brannan (2008)

GMA.est <- 2.5
GMA.se  <- (3.22 - 1.77)/(2*qnorm(.975))

label <- c("John and Brannan (2008), England", 
           "Pons and Liegey (2016), France", 
           "Pons (2016), France", 
           "Nyman (2014), Sweden", 
           "Foos and John (2016), England",
           "Cantoni and Pons (2016), Italy",
           "Bhatti et al. (2014b), Denmark", 
           "Weighted European estimate",
           "Weighted American estimate")

## Create figure 1 -- plot of study effects and pooled effects 

metaplot(c(effects,meta$summary,GMA.est),c(ses,meta$se.summary,GMA.se), 
         xlab="CACE", 
         label= label,
         boxsize=.5)

## Are the effects in Europe and US "statistically" different?

set.seed(121015) # date of first submission
sim_EU <- rnorm(10000000, mean = meta$summary, sd = meta$se.summary)
sim_US <- rnorm(10000000, mean = GMA.est, sd = GMA.se)
mean(sim_EU > sim_US)

# Use alternative pooled estimate where leaflets in Pons & Liegey and Pons are assumed to have no effect
sim_EU_1 <- rnorm(10000000, mean = meta_1$summary, sd = meta_1$se.summary)
mean(sim_EU_1 > sim_US)
## Plot of CACEs against turnout rate

control_rate <- 
  c(0.463, 0.342, 0.808, 0.453, .307, .747, 0.618, 0.536) * 100

CACE <- 
  c(effects[-7], -2.1, -15.4)

CACE_se <- 
  c(ses[-7], 4.6, 13.8)

label2 <- c(label[1:6],
            "Bhatti et al. (2014b), Copenhagen",
            "Bhatti et al. (2014b), Randers")
plot_data <- data.frame(CACE, CACE_se, control_rate, label2)
colnames(plot_data) <- c("CACE", "ses", "c_rate", "label")

# predict values from linear model weighted by precision
mod <- lm(CACE ~ c_rate, data = plot_data, weights = 1/(ses^2))
summary(mod)
plot_data$l_fit <- mod$fitted.values
plot_data$precision <- 1/(plot_data$ses^2)

# predict values from linear model for American studies
am_data$l_fit <- 
  lm(CACE ~ c_rate, data = am_data, weights = precision)$fitted.values

summary(lm(CACE ~ c_rate, data = am_data, weights = precision))

plot <-
  ggplot(plot_data, aes(c_rate, CACE, label = label)) + 
  geom_point(data = am_data, 
             aes(c_rate,
                 CACE,
                 size = precision,
                 alpha = 0.2))  +
  geom_smooth(data = am_data,
              aes(x = c_rate,
                  y = l_fit),
              col = "grey",
              linetype = "dashed") +
  geom_text(size = 4,
            vjust = 2,
            hjust = c(.5,.25,.75,.5,0,.5,.5,.5)) +
  geom_point(aes(alpha = 1, size = precision)) +
  theme_classic() +
  scale_size_continuous(guide = FALSE) +
  ylab("CACE") +
  xlab("Control group turnout") +
  geom_smooth(data = plot_data,
              aes(x = c_rate,
                  y = l_fit),
              col = "black") +
  scale_x_continuous(limits = c(0,90)) +
  scale_y_continuous(limits = c(-16, 20)) +
  scale_alpha_continuous(name   = "",
                         range  = c(0.2, 1),
                         breaks = c(0.2, 1),
                         labels = c("US", "Europe"))

plot

## run models with American and European data to test for curvelinear relationships

# EUROPEAN DATA 
# add control group turnout squared
plot_data <- 
  plot_data %>%
  mutate(c_rate2  = c_rate * c_rate)

# run model
mod_sq_EU <- lm(CACE ~ c_rate + c_rate2, 
                data = plot_data, 
                weights = 1/(ses^2))
# summarize model
summary(mod_sq_EU)
# append fitted values
plot_data$predict_inv <- 
  mod_sq_EU$fitted.values
# plot points and fitted values 
ggplot(data = plot_data, aes(x = c_rate, y = CACE)) +
  geom_point() + 
  geom_line(aes(x = c_rate, y = predict_inv))

## REPEAT FOR AMERICAN DATA

am_data <- 
  am_data %>%
  mutate(c_rate2  = c_rate * c_rate)

mod_sq_AM <- lm(CACE ~ c_rate + c_rate2, 
                data = am_data, 
                weights = 1/(ses^2))

summary(mod_sq_AM)

am_data$predict_inv <- 
  mod_sq_AM$fitted.values

ggplot(data = am_data, aes(x = c_rate, y = CACE)) +
  geom_point() + 
  geom_line(aes(x = c_rate, y = predict_inv))
